package com.jusyl.cristo.spider.lianjia;

import java.util.ArrayList;
import java.util.List;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

public class WebMagicLianJia implements PageProcessor{

	private Site site = Site.me().setRetryTimes(3).setSleepTime(1000);
	private static String TOTAL_PAGE="totalPage";
	private static String CUR_PAGE = "curPage";
	
	public WebMagicLianJia(){
	}

	@Override
	public void process(Page page) {
		//获取页面信息以供分析参考
		page.putField("page", page);
		
		//获取所有需要抓取的列表页
		String url = page.getHtml().$("div.house-lst-page-box", "page-url").get();
		String jsonStr = page.getHtml().$("div.house-lst-page-box", "page-data").get();
		
		String info = page.getHtml().xpath("//h1[@class='entry-title public']/strong/a/text()").toString();
		
		System.out.println(info);
		//JSONObject object=JSON.parseObject(jsonStr);

	}

	@Override
	public Site getSite() {
		return site;
	}
	
	public static void main(String arg[]) {
		Spider.create(new WebMagicLianJia())
			.addPipeline(new LianJiaPipeline())
			.addUrl("https://bj.lianjia.com/ershoufang/rs%E4%B8%89%E7%8E%AF%E6%96%B0%E5%9F%8E/")
			.thread(5).run();
	}
}
